"""
需求：爬取京东服装信息
url = 'https://search.jd.com/Search?keyword=女装&page=1'
user-agent: 浏览器user-agent校验
cookie：cookie检查
"""
import requests
from lxml import etree
import time
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
    'Cookie': '__jdv=76161171|direct|-|none|-|1733990478355; __jdu=17339904783552024338363; areaId=7; shshshfpa=7c25d2ee-54ed-c004-6052-9860a2bf2a60-1725010601; shshshfpx=7c25d2ee-54ed-c004-6052-9860a2bf2a60-1725010601; TrackID=1pDAXowrj7hrXHIDlmiBXCkYfJ2nMhvhSOC1aaUNnOlaBLxBlaZOAJ7fx6R9WZ2qGeen99dL96U0THqcimkHEdY4kON_HabOo5rMDUqpmWVM; thor=246E791D9B6F5F4351FC83947C4768919D9A56E1D1847EB05C95AE30F49EDCD8B60FFBD1EFDC40CB03C6CA7F13698FC2DC6A74D3B6D321FD2397B83716B2B1112B988F783D48752F4396B255A74D4B42B68BC35DE827AD46BFD7D69D8EB0A0EF28BE181AFFDCEE5B6E8A1D5B3A20958559FF766E5C1187590EB2EB8CF703437F; light_key=AASBKE7rOxgWQziEhC_QY6yartbwMaldNsHmLvgM9gHSd9VmqV6cWlv1yLaWuPN5A4cUurVA; pinId=CiS1j6Rg5Fk; pin=welunn; unick=8a5bs9jca24joh; ceshi3.com=000; _tp=wjz3%2BO2iOXaQ0eYwlazd7g%3D%3D; _pst=welunn; rkv=1.0; qrsc=3; ipLoc-djd=7-412-416-47178; jsavif=1; jsavif=1; 3AB9D23F7A4B3CSS=jdd03PGIJTL2F2S36HDCJGPEI3T6XQSKDZTDZ3OIX2Y4DCOPCCKGIB2AIC32Z7I3NQRFCO7RNEKPIFF4TW6O4PK5UOI5ZSEAAAAMTZT7NH3YAAAAADML2V7CFRL7AGIX; __jda=143920055.17339904783552024338363.1733990478.1734057766.1734311073.6; __jdc=143920055; flash=3_wGyIBdJZKU4RbUdsWFUyMggkTsiTNocbyu9cSJiLg3H91zGG5AXui6r5Q9MSWo3LksGOb-WeWxYBCnIhXKFz5L9UsC93svCC2Blu4VynM7SMrscfgfB5gD9Ef-HsXHyiy1qmFLL7eg2bZ1rQEvLOTw36droA_2KJX2JkCKjP; avif=1; __jdb=143920055.2.17339904783552024338363|6.1734311073; xapieid=jdd03PGIJTL2F2S36HDCJGPEI3T6XQSKDZTDZ3OIX2Y4DCOPCCKGIB2AIC32Z7I3NQRFCO7RNEKPIFF4TW6O4PK5UOI5ZSEAAAAMTZT7NH3YAAAAADML2V7CFRL7AGIX; shshshfpb=BApXS6AT3z_ZA03z-pqousNKb8RcZeZu6BmdAAS5p9xJ1MhKpzIC2; 3AB9D23F7A4B3C9B=PGIJTL2F2S36HDCJGPEI3T6XQSKDZTDZ3OIX2Y4DCOPCCKGIB2AIC32Z7I3NQRFCO7RNEKPIFF4TW6O4PK5UOI5ZSE'
}

for i in range(1, 20):
    url = f'https://search.jd.com/Search?keyword=女装&page={i}'

    res = requests.get(url, headers=headers)
    html = res.content.decode()
    result = etree.HTML(res.text)
    # 页面解析：lxml-xpath
    root = result.xpath('//ul[@class="gl-warp clearfix"]/li')
    for li in root:
        name = "".join(li.xpath('.//div[@class="p-name p-name-type-2"]/a/em/text()'))
        print("商品名称：", name)
        price = "".join(li.xpath('.//div[@class="p-price"]/strong/i/text()'))
        print("商品价格：", price)
        link = "".join(li.xpath('.//div[@class="p-name p-name-type-2"]/a/@href'))
        print("商品链接：", link)
        shop = "".join(li.xpath('.//div[@class="p-shop"]/span/a/text()'))
        print("*" * 80)
        time.sleep(10)